python - the JSON object must be str, bytes or bytearray, not NoneType

Collectives™ on Stack Overflow

Find centralized, trusted content and collaborate around the technologies you use most.
Learn more about Collectives
Teams
Q&A for work
Connect and share knowledge within a single location that is structured and easy to search.
Learn more about Teams
This program takes html file from input directory and translate it to hindi using googletrans.
import os
from bs4 import BeautifulSoup
from googletrans import Translator
# Set the input and output directories
input_dir = r"C:\My Web Sites\CC\www.classcentral.com\subject"
output_dir = r"C:\My Web Sites\CC\www.classcentral.com\translated\subject"
# Create the output directory if it doesn't exist
if not os.path.exists(output_dir):
    os.makedirs(output_dir)
# Create the translator object
translator = Translator(service_urls=['translate.google.com'])
# Iterate through all HTML files in the input directory
for filename in os.listdir(input_dir):
    if filename.endswith('.html'):
        # Read in the input file
        with open(os.path.join(input_dir, filename), 'r', encoding='latin-1') as f:
            # Parse the HTML using BeautifulSoup
            soup = BeautifulSoup(f, 'html.parser')
            # Translate the text in the HTML
            for element in soup.find_all(text=True):
                if element.strip():  # Skip empty strings
                        translated_text = translator.translate(element.string, dest='hi').text
                        element.string.replace_with(translated_text)
                    except:
                        print("Translation failed for element: ", element)
        # Write out the translated HTML to a new file in the output directory
        with open(os.path.join(output_dir, filename), 'w', encoding='latin-1') as f:
            f.write(str(soup))
            print(f"Translated file '{filename}' written to '{output_dir}'")
I am gettig an error:
  File "e:\Webscraping\Translate1.py", line 36, in <module>
    translation = translator.translate(element.string, dest='hi')
                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Python311\Lib\site-packages\googletrans\client.py", line 219, in translate
    parsed = json.loads(data[0][2])
             ^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Python311\Lib\json\__init__.py", line 339, in loads
    raise TypeError(f'the JSON object must be str, bytes or bytearray, '
TypeError: the JSON object must be str, bytes or bytearray, not NoneType
During the handling of the above exception, another exception occurred:
Traceback (most recent call last):
  File "e:\Webscraping\Translate1.py", line 44, in <module>
    print("Translation failed for element: ", element)
  File "C:\Python311\Lib\encodings\cp1252.py", line 19, in encode
    return codecs.charmap_encode(input,self.errors,encoding_table)[0]
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
UnicodeEncodeError: 'charmap' codec can't encode character '\u2192' in position 178878: character maps to <undefined>
I cannot pin point the reason behind the error. Does someone know the fix? For the 2nd error I have used utf-8 16, and 32 also latin-1 still it is giving the same error.
                Based on that, maybe try element.encode("utf-8") instead of element in your print statement?
– slothrop
                Feb 19 at 11:34
                regarding the 1st error: github.com/ssut/py-googletrans/issues/301 suggests that this happens when the content to be translated is too long
– slothrop
                Feb 19 at 11:48
So I changed the code a bit. So to solve the encoding error I used cardet to detect the encoding of the file and then reopened the file with detected encoding.
Here's the code:
import os
import chardet
from bs4 import BeautifulSoup
from googletrans import Translator
import logging
# Set up logging
logging.basicConfig(filename='translation.log', level=logging.DEBUG)
# Set the input and output directories
input_dir = r"C:\My Web Sites\CC\www.classcentral.com\institution"
output_dir = r"C:\My Web Sites\CC\www.classcentral.com\translated\institution"
# Create the output directory if it doesn't exist
if not os.path.exists(output_dir):
    os.makedirs(output_dir)
# Create the translator object
translator = Translator(service_urls=['translate.google.com'])
# Iterate through all HTML files in the input directory
for filename in os.listdir(input_dir):
    if filename.endswith('.html'):
        # Read in the input file
        with open(os.path.join(input_dir, filename), 'rb') as f:
            # Detect the encoding of the file
            encoding = chardet.detect(f.read())['encoding']
            # Re-open the file with the detected encoding
            f.seek(0)
            text = f.read().decode(encoding)
            soup = BeautifulSoup(text, 'html.parser')
            # Translate the text in the HTML
            for element in soup.find_all(text=True):
                if element.strip():  # Skip empty strings
                        translated_text = translator.translate(element.string, dest='hi').text
                        element.string.replace_with(translated_text)
                    except Exception as e:
                        logging.error(f"Translation failed for element: {element} with error: {e}")
        # Write out the translated HTML to a new file in the output directory
        with open(os.path.join(output_dir, filename), 'w', encoding='utf-8') as f:
            f.write(str(soup))
            logging.info(f"Translated file '{filename}' written to '{output_dir}'")
        Thanks for contributing an answer to Stack Overflow!
Please be sure to answer the question. Provide details and share your research!
But avoid …
Asking for help, clarification, or responding to other answers.
Making statements based on opinion; back them up with references or personal experience.
To learn more, see our tips on writing great answers.