Guessing of content type for attachments in EmlFileProvider #128
-
|
First of all thanks for making faker-file. It has been helpful in testing our text extraction tools. Just seeing the commented out code in https://github.com/barseghyanartur/faker-file/blob/0.18.2/src/faker_file/providers/eml_file.py#L247 and https://github.com/barseghyanartur/faker-file/blob/0.18.2/src/faker_file/providers/eml_file.py#L275 is there a reason this wasn't working with the guessed content type? I uncommented it for my own very limited tests and it seemed to work. |
Beta Was this translation helpful? Give feedback.
Replies: 1 comment 6 replies
-
|
Thanks for reporting this. I think it didn't work quite well right away for all providers. WIP in Correct-guess-type branch. I think it works well now. Filename: faker_file_guess_mime_maintype_subtype.py from pathlib import Path
from faker import Faker
from faker_file.providers.bin_file import BinFileProvider
from faker_file.providers.bmp_file import BmpFileProvider
from faker_file.providers.csv_file import CsvFileProvider
from faker_file.providers.docx_file import DocxFileProvider
from faker_file.providers.eml_file import EmlFileProvider
from faker_file.providers.epub_file import EpubFileProvider
from faker_file.providers.gif_file import GifFileProvider
from faker_file.providers.ico_file import (
GraphicIcoFileProvider,
IcoFileProvider,
)
from faker_file.providers.jpeg_file import (
GraphicJpegFileProvider,
JpegFileProvider,
)
from faker_file.providers.mp3_file import Mp3FileProvider
from faker_file.providers.odp_file import OdpFileProvider
from faker_file.providers.ods_file import OdsFileProvider
from faker_file.providers.odt_file import OdtFileProvider
from faker_file.providers.pdf_file import (
GraphicPdfFileProvider,
PdfFileProvider,
)
from faker_file.providers.png_file import (
GraphicPngFileProvider,
PngFileProvider,
)
from faker_file.providers.pptx_file import PptxFileProvider
from faker_file.providers.rtf_file import RtfFileProvider
from faker_file.providers.svg_file import SvgFileProvider
from faker_file.providers.tar_file import TarFileProvider
from faker_file.providers.tiff_file import TiffFileProvider
from faker_file.providers.txt_file import TxtFileProvider
from faker_file.providers.webp_file import (
GraphicWebpFileProvider,
WebpFileProvider,
)
from faker_file.providers.xlsx_file import XlsxFileProvider
from faker_file.providers.zip_file import ZipFileProvider
from faker_file.registry import FILE_REGISTRY
from faker_file.helpers import get_mime_maintype_subtype
FAKER = Faker()
FAKER.add_provider(BinFileProvider)
FAKER.add_provider(BmpFileProvider)
FAKER.add_provider(CsvFileProvider)
FAKER.add_provider(DocxFileProvider)
FAKER.add_provider(EmlFileProvider)
FAKER.add_provider(EpubFileProvider)
FAKER.add_provider(GifFileProvider)
FAKER.add_provider(GraphicIcoFileProvider)
FAKER.add_provider(GraphicJpegFileProvider)
FAKER.add_provider(GraphicPdfFileProvider)
FAKER.add_provider(GraphicPngFileProvider)
FAKER.add_provider(GraphicWebpFileProvider)
FAKER.add_provider(IcoFileProvider)
FAKER.add_provider(JpegFileProvider)
FAKER.add_provider(Mp3FileProvider)
FAKER.add_provider(OdpFileProvider)
FAKER.add_provider(OdsFileProvider)
FAKER.add_provider(OdtFileProvider)
FAKER.add_provider(PdfFileProvider)
FAKER.add_provider(PngFileProvider)
FAKER.add_provider(PptxFileProvider)
FAKER.add_provider(RtfFileProvider)
FAKER.add_provider(SvgFileProvider)
FAKER.add_provider(TarFileProvider)
FAKER.add_provider(TiffFileProvider)
FAKER.add_provider(TxtFileProvider)
FAKER.add_provider(WebpFileProvider)
FAKER.add_provider(XlsxFileProvider)
FAKER.add_provider(ZipFileProvider)
bin_file = FAKER.bin_file() # 1 - 1
bmp_file = FAKER.bmp_file() # 2 - 1
csv_file = FAKER.csv_file() # 3 - 1
docx_file = FAKER.docx_file() # 4 - 1
eml_file = FAKER.eml_file() # 5 - 1
epub_file = FAKER.epub_file() # 6 - 1
gif_file = FAKER.gif_file() # 7 - 1
graphic_ico_file = FAKER.graphic_ico_file() # 8 - 2
graphic_jpeg_file = FAKER.graphic_jpeg_file() # 9 - 2
graphic_pdf_file = FAKER.graphic_pdf_file() # 10 - 2
graphic_png_file = FAKER.graphic_png_file() # 11 - 2
graphic_webp_file = FAKER.graphic_webp_file() # 12 - 2
ico_file = FAKER.ico_file() # 13 - dup
jpeg_file = FAKER.jpeg_file() # 14 - dup
mp3_file = FAKER.mp3_file() # 15 - 1
odp_file = FAKER.odp_file() # 16 - 1
ods_file = FAKER.ods_file() # 17 - 1
odt_file = FAKER.odt_file() # 18 - 1
pdf_file = FAKER.pdf_file() # 19 - dup
png_file = FAKER.png_file() # 20 - dup
pptx_file = FAKER.pptx_file() # 21 - 1
rtf_file = FAKER.rtf_file() # 22 - 1
svg_file = FAKER.svg_file() # 23 - 1
tar_file = FAKER.tar_file() # 24 - 1
tiff_file = FAKER.tiff_file() # 25 - 1
txt_file = FAKER.txt_file() # 26 - 1
webp_file = FAKER.webp_file() # 27 - dup
xlsx_file = FAKER.xlsx_file() # 28 - 1
zip_file = FAKER.zip_file() # 29 - 1
files = list(FILE_REGISTRY._registry)
unique_exts = {Path(p).suffix for p in files}
print(f"Unique extensions ({len(unique_exts)}):")
files.append("unknown.bin")
for fname in files:
main, sub = get_mime_maintype_subtype(fname)
print(f"{fname!r} : {main}/{sub}")Output: Filename: faker_file_eml_file_with_all_types.py from faker import Faker
from faker_file.providers.eml_file import EmlFileProvider
from faker_file.providers.helpers.inner import (
create_inner_bin_file,
create_inner_csv_file,
create_inner_docx_file,
create_inner_eml_file,
create_inner_epub_file,
create_inner_graphic_ico_file,
create_inner_graphic_jpeg_file,
create_inner_graphic_pdf_file,
create_inner_graphic_png_file,
create_inner_graphic_webp_file,
create_inner_ico_file,
create_inner_jpeg_file,
create_inner_json_file,
create_inner_mp3_file,
create_inner_odp_file,
create_inner_ods_file,
create_inner_odt_file,
create_inner_pdf_file,
create_inner_png_file,
create_inner_pptx_file,
create_inner_rtf_file,
create_inner_svg_file,
create_inner_tar_file,
create_inner_txt_file,
create_inner_webp_file,
create_inner_xlsx_file,
create_inner_xml_file,
create_inner_zip_file,
list_create_inner_file,
)
from faker_file.storages.filesystem import FileSystemStorage
FAKER = Faker()
FAKER.add_provider(EmlFileProvider)
STORAGE = FileSystemStorage()
kwargs = {"storage": STORAGE, "generator": FAKER}
eml_file = FAKER.eml_file(
prefix="zzz",
options={
"count": 10,
"create_inner_file_func": list_create_inner_file,
"create_inner_file_args": {
"func_list": [
(create_inner_bin_file, kwargs),
(create_inner_csv_file, kwargs),
(create_inner_docx_file, kwargs),
(create_inner_eml_file, kwargs),
(create_inner_epub_file, kwargs),
(create_inner_graphic_ico_file, kwargs),
(create_inner_graphic_jpeg_file, kwargs),
(create_inner_graphic_pdf_file, kwargs),
(create_inner_graphic_png_file, kwargs),
(create_inner_graphic_webp_file, kwargs),
(create_inner_ico_file, kwargs),
(create_inner_jpeg_file, kwargs),
(create_inner_json_file, kwargs),
(create_inner_mp3_file, kwargs),
(create_inner_odp_file, kwargs),
(create_inner_ods_file, kwargs),
(create_inner_odt_file, kwargs),
(create_inner_pdf_file, kwargs),
(create_inner_png_file, kwargs),
(create_inner_pptx_file, kwargs),
(create_inner_rtf_file, kwargs),
(create_inner_svg_file, kwargs),
(create_inner_tar_file, kwargs),
(create_inner_txt_file, kwargs),
(create_inner_webp_file, kwargs),
(create_inner_xlsx_file, kwargs),
(create_inner_xml_file, kwargs),
(create_inner_zip_file, kwargs),
],
},
},
)Output Looks correct to me (rename to .eml). Could you test it as well? If it works well, I'll merge into dev -> main and make a release. |
Beta Was this translation helpful? Give feedback.
@martinburchell
Thanks for reporting this.
I think it didn't work quite well right away for all providers. WIP in Correct-guess-type branch.
I think it works well now.
Filename: faker_file_guess_mime_maintype_subtype.py