Skip to content
This repository has been archived by the owner on Feb 22, 2024. It is now read-only.

Commit

Permalink
chore: add msg and json file types (#169)
Browse files Browse the repository at this point in the history
  • Loading branch information
ryannikolaidis authored May 3, 2023
1 parent 15b5d73 commit 2b043ab
Show file tree
Hide file tree
Showing 22 changed files with 390 additions and 83 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# 0.10.3

* Add support for json and msg file types

# 0.10.2

* Set black line length to 100
Expand Down
Binary file not shown.
226 changes: 226 additions & 0 deletions test_unstructured_api_tools/api/fixtures/spring-weather.html.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,226 @@
[
{
"element_id": "41f6e17bf5e9a407fcca74e902f802a0",
"text": "News Around NOAA",
"type": "Title",
"metadata": {
"page_number": 1
}
},
{
"element_id": "aa589c25dc22dcc8a75baba1244e6c8f",
"text": "National Program",
"type": "Title",
"metadata": {
"page_number": 1
}
},
{
"element_id": "62c26d2e16774d2334bd804c7bb6a711",
"text": "Are You Weather-Ready for the Spring?",
"type": "Title",
"metadata": {
"page_number": 1
}
},
{
"element_id": "32709cd3bec72640bbbe32f58e6e23f6",
"text": "Weather.gov >",
"type": "Title",
"metadata": {
"page_number": 1
}
},
{
"element_id": "2661da76db570876b075083aaeeaee55",
"text": "News Around NOAA > Are You Weather-Ready for the Spring?",
"type": "Title",
"metadata": {
"page_number": 1
}
},
{
"element_id": "fab6c4df083f0fb6f324fff65b652c86",
"text": "Weather Safety Air Quality Beach Hazards Cold Cold Water Drought Floods Fog Heat Hurricanes Lightning Safety Rip Currents Safe Boating Space Weather Sun (Ultraviolet Radiation) Thunderstorms & Tornadoes Tornado Tsunami Wildfire Wind Winter",
"type": "ListItem",
"metadata": {
"page_number": 1
}
},
{
"element_id": "45c26cf3457e6d18985a435e2c0fcc65",
"text": "Safety Campaigns Seasonal Safety Campaigns #SafePlaceSelfie Deaf & Hard of Hearing Intellectual Disabilities Spanish-language Content The Great Outdoors",
"type": "ListItem",
"metadata": {
"page_number": 1
}
},
{
"element_id": "77f5acc603de9a165ed87a5c3fbaf14a",
"text": "Ambassador About WRN Ambassadors Become an Ambassador Ambassadors of Excellence People of WRN FAQS Tell Your Success Story Success Stories Tri-fold Aviation Current Ambassadors Brochure En Español",
"type": "ListItem",
"metadata": {
"page_number": 1
}
},
{
"element_id": "8f19bcaabbd1bafa5e9826ac69766c8b",
"text": "Education NWS Education Home Be A Force Of Nature WRN Kids Flyer Wireless Emergency Alerts NOAA Weather Radio Mobile Weather Brochures Hourly Weather Forecast Citizen Science Intellectual Disabilities",
"type": "ListItem",
"metadata": {
"page_number": 1
}
},
{
"element_id": "1245f9cf9e019713391e4ee3bac54a63",
"text": "Collaboration Get Involved Social Media WRN Ambassadors ​ Enterprise Resources StormReady TsunamiReady NWSChat (core partners only) InteractiveNWS (iNWS) (core partners only)​ SKYWARN",
"type": "ListItem",
"metadata": {
"page_number": 1
}
},
{
"element_id": "23dfa7f98424dbf86e00b3d500096dfa",
"text": "News & Events Latest News Calendar Meetings & Workshops NWS Aware Newsletter",
"type": "ListItem",
"metadata": {
"page_number": 1
}
},
{
"element_id": "93202df2ec7081b28b47901b5c287a5a",
"text": "International",
"type": "ListItem",
"metadata": {
"page_number": 1
}
},
{
"element_id": "e53d6a9c615bdf1a8d7b98a67cade488",
"text": "About Contact Us What is WRN? WRN FAQ WRN Brochure Hazard Simplification IDSS Brochure Roadmap Strategic Plan WRN International Social Science",
"type": "ListItem",
"metadata": {
"page_number": 1
}
},
{
"element_id": "6cbcf8c11f8c0781bd9ecc7f67169ff0",
"text": "The spring season is all about change – a rebirth both literally and figuratively. Even though the spring season doesn’t officially (astronomically, that is) begin until March 20 this year, climatologically, it starts March 1.",
"type": "NarrativeText",
"metadata": {
"page_number": 1
}
},
{
"element_id": "7184168da442c6ef28553b274bf2be8f",
"text": "As cold winter nights are replaced by the warmth of longer daylight hours, the National Weather Service invites you to do two important things that may save your life or the life of a loved one.",
"type": "NarrativeText",
"metadata": {
"page_number": 1
}
},
{
"element_id": "f3be9748ecd68b20d706548129baa22d",
"text": "First, take steps to better prepare for the seasonal hazards weather can throw at you.\nThis could include a spring cleaning of your storm shelter or ensuring your emergency kit is fully stocked. Take a look at our infographics and social media posts to help you become “weather-ready.”",
"type": "NarrativeText",
"metadata": {
"page_number": 1
}
},
{
"element_id": "126c3cd201fb259cfeabc6bffc0b5473",
"text": "Second, encourage others to become Weather-Ready as well. Share the message by taking advantage of our vast array of weather safety content – everything posted on our Spring Safety website is freely available, and we encourage sharing on social media networks. Also remember those who are most vulnerable, like an elderly family member or neighbor who might have limited mobility or is isolated. Reach out to those who are at higher risk of being impacted by extreme weather, and help them get prepared. This simple act of caring could become heroic.",
"type": "NarrativeText",
"metadata": {
"page_number": 1
}
},
{
"element_id": "c1944fb037f3e1cb14969bc59a7dd9c2",
"text": "This spring, the campaign is focused on heat dangers. Heat illness and death can occur even in spring’s moderately warm weather. The majority of all heat-related deaths occur outside of heat waves and roughly a third of child hot car deaths occur outside of the summer months. Learn more by viewing the infographics that are now available.",
"type": "NarrativeText",
"metadata": {
"page_number": 1
}
},
{
"element_id": "fa1b939ef6159d95260bc095f58ebbc2",
"text": "Stay safe this spring, and every season, by being informed, prepared, and Weather-Ready.",
"type": "NarrativeText",
"metadata": {
"page_number": 1
}
},
{
"element_id": "47d5d0d27a35a36d7467dfc8b6e089b3",
"text": "US Dept of Commerce\n National Oceanic and Atmospheric Administration\n National Weather Service\n News Around NOAA1325 East West HighwaySilver Spring, MD 20910Comments? Questions? Please Contact Us.",
"type": "NarrativeText",
"metadata": {
"page_number": 1
}
},
{
"element_id": "129c678fce59acee7ac6a6fdb67b6310",
"text": "Disclaimer",
"type": "Title",
"metadata": {
"page_number": 1
}
},
{
"element_id": "3c96caaebd949e39d25b3ccf4133c5d8",
"text": "Information Quality",
"type": "Title",
"metadata": {
"page_number": 1
}
},
{
"element_id": "b79cac926e0b2e347e72cc91d5174037",
"text": "Help",
"type": "Title",
"metadata": {
"page_number": 1
}
},
{
"element_id": "4c4e436f9a453c776dbf011f98d932d6",
"text": "Glossary",
"type": "Title",
"metadata": {
"page_number": 1
}
},
{
"element_id": "506ff394621596dd88138642eddfc1e4",
"text": "Privacy Policy",
"type": "Title",
"metadata": {
"page_number": 1
}
},
{
"element_id": "c70ae8c30a61c450d2c5148d1b6a0447",
"text": "Freedom of Information Act (FOIA)",
"type": "Title",
"metadata": {
"page_number": 1
}
},
{
"element_id": "5d8c71abc527284cd463aa58f3f48098",
"text": "About Us",
"type": "Title",
"metadata": {
"page_number": 1
}
},
{
"element_id": "a8a00c355d2fa1461d532a1088274f32",
"text": "Career Opportunities",
"type": "Title",
"metadata": {
"page_number": 1
}
}
]
14 changes: 10 additions & 4 deletions test_unstructured_api_tools/api/functions_and_variables.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
FILE_TXT_1 = "test_unstructured_api_tools/api/fixtures/text_file.txt"
FILE_TXT_2 = "test_unstructured_api_tools/api/fixtures/text_file_2.txt"
FILE_MARKDOWN = "test_unstructured_api_tools/api/fixtures/markdown.md"
FILE_MSG = "test_unstructured_api_tools/api/fixtures/fake-email.msg"
FILE_JSON = "test_unstructured_api_tools/api/fixtures/spring-weather.html.json"

GZIP_FILE_DOCX = "test_unstructured_api_tools/api/fixtures/fake.docx.gz"
GZIP_FILE_IMAGE = "test_unstructured_api_tools/api/fixtures/example.jpg.gz"
Expand All @@ -19,6 +21,8 @@
FILE_TXT_2: 30,
GZIP_FILE_TXT_2: 30,
FILE_MARKDOWN: 91,
FILE_MSG: 11776,
FILE_JSON: 13151,
}
FILENAME_FORMATS = {
FILE_DOCX: "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
Expand All @@ -30,6 +34,8 @@
GZIP_FILE_TXT_1: "application/gzip",
GZIP_FILE_TXT_2: "application/gzip",
FILE_MARKDOWN: "text/markdown",
FILE_MSG: "message/rfc822",
FILE_JSON: "application/json",
"octet_stream": "application/octet-stream",
}

Expand All @@ -49,28 +55,28 @@
RESPONSE_SCHEMA_LABELSTUDIO = {"output_schema": "labelstudio"}


def convert_files_for_api(files, is_another_md_mimetype=False):
def convert_files_for_api(files, use_octet_stream_type=False):
return [
(
"files",
(
test_file,
open(test_file, "rb"),
FILENAME_FORMATS["octet_stream" if is_another_md_mimetype else test_file],
FILENAME_FORMATS["octet_stream" if use_octet_stream_type else test_file],
),
)
for test_file in files
]


def convert_text_files_for_api(files, is_another_md_mimetype=False):
def convert_text_files_for_api(files, use_octet_stream_type=False):
return [
(
"text_files",
(
test_file,
open(test_file, "rb"),
FILENAME_FORMATS["octet_stream" if is_another_md_mimetype else test_file],
FILENAME_FORMATS["octet_stream" if use_octet_stream_type else test_file],
),
)
for test_file in files
Expand Down
Loading

0 comments on commit 2b043ab

Please sign in to comment.