Translating LaTex documents in different languages
up vote
0
down vote
favorite
I'm coming from C and C++ and have often troubles using pythons full potential. This is a script I wrote to translate LaTex documents in different languages. A parser replaces all latex specific syntax with hashes and then sends it all to deepl. The pydeepl wrapper is from Github.
What do you think of this code?
import sys
import re
import pydeepl
from random import choice
from tqdm import tqdm
def make_xlat(*args, **kwds):
adict = dict(*args, **kwds)
rx = re.compile('|'.join(map(re.escape, adict)))
def one_xlat(match):
return adict[match.group(0)]
def xlat(text):
return rx.sub(one_xlat, text)
return xlat
if __name__ == "__main__":
fileInputName = sys.argv[1]
#fileInputName = "Introduction.tex"
fileOutName = fileInputName.split('.')[0]+"_trans.tex"
fileIn = open(fileInputName, "r")
fileOut = open(fileOutName, "w")
fileStr = fileIn.read()
print("Starting hashing...")
#replace commands like begin{*}, end{*}, tabs etc. with hashes
searchObj1 = re.findall( r"\begin{w+}|t| |r|\end{w+}|\usepackage{w+}|\newcommand{w+}|\include{.*}|\input{w+}|\w+[.*}|%.*", fileStr)
#random number for every found command + a prefix which hopefully doens't appear in text. Used to skip lines later, which don't need translation
list1 = ['X#X' + str(choice(range(1111, 9999, 1))) for x in searchObj1]
#make a dictionary out of hashes
d1 = dict(zip(searchObj1,list1))
translate = make_xlat(d1)
hashedText = translate(fileStr)
#replace all latex commands (starting with a backslash) with hashes
searchObj2 = re.findall( r"\w+",hashedText)
#random number + prefix again
list2 = ['X#X' + str(choice(range(1111, 9999, 1))) for x in searchObj2]
#make a dictionary
d2 = dict(zip(searchObj2,list2))
translate = make_xlat(d2)
hashedText = translate(hashedText)
#print(hashedText)
#fileOut.write(translate(hashedText))
d1.update(d2) # combine dictionaries
#with open('hash_dict.json', 'w') as f:
#json.dump(d1, f)
print("Hashing done. Starting translation...")
translated = ''
for line in tqdm(hashedText.splitlines()): #tqdm is a progressbar
#print(line)
if line.startswith("X#X") and len(line) == 7:
translated = translated + line + 'n'
continue
if line == 'n':
translated = translated + 'n'
elif line == '':
translated = translated + 'n'
else:
translated = translated+pydeepl.translate(line, "DE", "EN")+'n'
#translated = translated+pydeepl.translate(hashedText, "DE", "EN")
#print(translated)
d1Inv = {val:key for (key, val) in d1.items()} #swap dictionary
translate2 = make_xlat(d1Inv)
fileStrOut = translate2(translated)
#print(fileStrOut)
fileOut.write(fileStrOut)
print("success")
fileIn.close()
fileOut.close()
python google-translate
New contributor
add a comment |
up vote
0
down vote
favorite
I'm coming from C and C++ and have often troubles using pythons full potential. This is a script I wrote to translate LaTex documents in different languages. A parser replaces all latex specific syntax with hashes and then sends it all to deepl. The pydeepl wrapper is from Github.
What do you think of this code?
import sys
import re
import pydeepl
from random import choice
from tqdm import tqdm
def make_xlat(*args, **kwds):
adict = dict(*args, **kwds)
rx = re.compile('|'.join(map(re.escape, adict)))
def one_xlat(match):
return adict[match.group(0)]
def xlat(text):
return rx.sub(one_xlat, text)
return xlat
if __name__ == "__main__":
fileInputName = sys.argv[1]
#fileInputName = "Introduction.tex"
fileOutName = fileInputName.split('.')[0]+"_trans.tex"
fileIn = open(fileInputName, "r")
fileOut = open(fileOutName, "w")
fileStr = fileIn.read()
print("Starting hashing...")
#replace commands like begin{*}, end{*}, tabs etc. with hashes
searchObj1 = re.findall( r"\begin{w+}|t| |r|\end{w+}|\usepackage{w+}|\newcommand{w+}|\include{.*}|\input{w+}|\w+[.*}|%.*", fileStr)
#random number for every found command + a prefix which hopefully doens't appear in text. Used to skip lines later, which don't need translation
list1 = ['X#X' + str(choice(range(1111, 9999, 1))) for x in searchObj1]
#make a dictionary out of hashes
d1 = dict(zip(searchObj1,list1))
translate = make_xlat(d1)
hashedText = translate(fileStr)
#replace all latex commands (starting with a backslash) with hashes
searchObj2 = re.findall( r"\w+",hashedText)
#random number + prefix again
list2 = ['X#X' + str(choice(range(1111, 9999, 1))) for x in searchObj2]
#make a dictionary
d2 = dict(zip(searchObj2,list2))
translate = make_xlat(d2)
hashedText = translate(hashedText)
#print(hashedText)
#fileOut.write(translate(hashedText))
d1.update(d2) # combine dictionaries
#with open('hash_dict.json', 'w') as f:
#json.dump(d1, f)
print("Hashing done. Starting translation...")
translated = ''
for line in tqdm(hashedText.splitlines()): #tqdm is a progressbar
#print(line)
if line.startswith("X#X") and len(line) == 7:
translated = translated + line + 'n'
continue
if line == 'n':
translated = translated + 'n'
elif line == '':
translated = translated + 'n'
else:
translated = translated+pydeepl.translate(line, "DE", "EN")+'n'
#translated = translated+pydeepl.translate(hashedText, "DE", "EN")
#print(translated)
d1Inv = {val:key for (key, val) in d1.items()} #swap dictionary
translate2 = make_xlat(d1Inv)
fileStrOut = translate2(translated)
#print(fileStrOut)
fileOut.write(fileStrOut)
print("success")
fileIn.close()
fileOut.close()
python google-translate
New contributor
add a comment |
up vote
0
down vote
favorite
up vote
0
down vote
favorite
I'm coming from C and C++ and have often troubles using pythons full potential. This is a script I wrote to translate LaTex documents in different languages. A parser replaces all latex specific syntax with hashes and then sends it all to deepl. The pydeepl wrapper is from Github.
What do you think of this code?
import sys
import re
import pydeepl
from random import choice
from tqdm import tqdm
def make_xlat(*args, **kwds):
adict = dict(*args, **kwds)
rx = re.compile('|'.join(map(re.escape, adict)))
def one_xlat(match):
return adict[match.group(0)]
def xlat(text):
return rx.sub(one_xlat, text)
return xlat
if __name__ == "__main__":
fileInputName = sys.argv[1]
#fileInputName = "Introduction.tex"
fileOutName = fileInputName.split('.')[0]+"_trans.tex"
fileIn = open(fileInputName, "r")
fileOut = open(fileOutName, "w")
fileStr = fileIn.read()
print("Starting hashing...")
#replace commands like begin{*}, end{*}, tabs etc. with hashes
searchObj1 = re.findall( r"\begin{w+}|t| |r|\end{w+}|\usepackage{w+}|\newcommand{w+}|\include{.*}|\input{w+}|\w+[.*}|%.*", fileStr)
#random number for every found command + a prefix which hopefully doens't appear in text. Used to skip lines later, which don't need translation
list1 = ['X#X' + str(choice(range(1111, 9999, 1))) for x in searchObj1]
#make a dictionary out of hashes
d1 = dict(zip(searchObj1,list1))
translate = make_xlat(d1)
hashedText = translate(fileStr)
#replace all latex commands (starting with a backslash) with hashes
searchObj2 = re.findall( r"\w+",hashedText)
#random number + prefix again
list2 = ['X#X' + str(choice(range(1111, 9999, 1))) for x in searchObj2]
#make a dictionary
d2 = dict(zip(searchObj2,list2))
translate = make_xlat(d2)
hashedText = translate(hashedText)
#print(hashedText)
#fileOut.write(translate(hashedText))
d1.update(d2) # combine dictionaries
#with open('hash_dict.json', 'w') as f:
#json.dump(d1, f)
print("Hashing done. Starting translation...")
translated = ''
for line in tqdm(hashedText.splitlines()): #tqdm is a progressbar
#print(line)
if line.startswith("X#X") and len(line) == 7:
translated = translated + line + 'n'
continue
if line == 'n':
translated = translated + 'n'
elif line == '':
translated = translated + 'n'
else:
translated = translated+pydeepl.translate(line, "DE", "EN")+'n'
#translated = translated+pydeepl.translate(hashedText, "DE", "EN")
#print(translated)
d1Inv = {val:key for (key, val) in d1.items()} #swap dictionary
translate2 = make_xlat(d1Inv)
fileStrOut = translate2(translated)
#print(fileStrOut)
fileOut.write(fileStrOut)
print("success")
fileIn.close()
fileOut.close()
python google-translate
New contributor
I'm coming from C and C++ and have often troubles using pythons full potential. This is a script I wrote to translate LaTex documents in different languages. A parser replaces all latex specific syntax with hashes and then sends it all to deepl. The pydeepl wrapper is from Github.
What do you think of this code?
import sys
import re
import pydeepl
from random import choice
from tqdm import tqdm
def make_xlat(*args, **kwds):
adict = dict(*args, **kwds)
rx = re.compile('|'.join(map(re.escape, adict)))
def one_xlat(match):
return adict[match.group(0)]
def xlat(text):
return rx.sub(one_xlat, text)
return xlat
if __name__ == "__main__":
fileInputName = sys.argv[1]
#fileInputName = "Introduction.tex"
fileOutName = fileInputName.split('.')[0]+"_trans.tex"
fileIn = open(fileInputName, "r")
fileOut = open(fileOutName, "w")
fileStr = fileIn.read()
print("Starting hashing...")
#replace commands like begin{*}, end{*}, tabs etc. with hashes
searchObj1 = re.findall( r"\begin{w+}|t| |r|\end{w+}|\usepackage{w+}|\newcommand{w+}|\include{.*}|\input{w+}|\w+[.*}|%.*", fileStr)
#random number for every found command + a prefix which hopefully doens't appear in text. Used to skip lines later, which don't need translation
list1 = ['X#X' + str(choice(range(1111, 9999, 1))) for x in searchObj1]
#make a dictionary out of hashes
d1 = dict(zip(searchObj1,list1))
translate = make_xlat(d1)
hashedText = translate(fileStr)
#replace all latex commands (starting with a backslash) with hashes
searchObj2 = re.findall( r"\w+",hashedText)
#random number + prefix again
list2 = ['X#X' + str(choice(range(1111, 9999, 1))) for x in searchObj2]
#make a dictionary
d2 = dict(zip(searchObj2,list2))
translate = make_xlat(d2)
hashedText = translate(hashedText)
#print(hashedText)
#fileOut.write(translate(hashedText))
d1.update(d2) # combine dictionaries
#with open('hash_dict.json', 'w') as f:
#json.dump(d1, f)
print("Hashing done. Starting translation...")
translated = ''
for line in tqdm(hashedText.splitlines()): #tqdm is a progressbar
#print(line)
if line.startswith("X#X") and len(line) == 7:
translated = translated + line + 'n'
continue
if line == 'n':
translated = translated + 'n'
elif line == '':
translated = translated + 'n'
else:
translated = translated+pydeepl.translate(line, "DE", "EN")+'n'
#translated = translated+pydeepl.translate(hashedText, "DE", "EN")
#print(translated)
d1Inv = {val:key for (key, val) in d1.items()} #swap dictionary
translate2 = make_xlat(d1Inv)
fileStrOut = translate2(translated)
#print(fileStrOut)
fileOut.write(fileStrOut)
print("success")
fileIn.close()
fileOut.close()
python google-translate
python google-translate
New contributor
New contributor
edited 5 mins ago
Jamal♦
30.2k11115226
30.2k11115226
New contributor
asked 41 mins ago
Mr.Sh4nnon
11
11
New contributor
New contributor
add a comment |
add a comment |
active
oldest
votes
active
oldest
votes
active
oldest
votes
active
oldest
votes
active
oldest
votes
Mr.Sh4nnon is a new contributor. Be nice, and check out our Code of Conduct.
Mr.Sh4nnon is a new contributor. Be nice, and check out our Code of Conduct.
Mr.Sh4nnon is a new contributor. Be nice, and check out our Code of Conduct.
Mr.Sh4nnon is a new contributor. Be nice, and check out our Code of Conduct.
Thanks for contributing an answer to Code Review Stack Exchange!
- Please be sure to answer the question. Provide details and share your research!
But avoid …
- Asking for help, clarification, or responding to other answers.
- Making statements based on opinion; back them up with references or personal experience.
Use MathJax to format equations. MathJax reference.
To learn more, see our tips on writing great answers.
Some of your past answers have not been well-received, and you're in danger of being blocked from answering.
Please pay close attention to the following guidance:
- Please be sure to answer the question. Provide details and share your research!
But avoid …
- Asking for help, clarification, or responding to other answers.
- Making statements based on opinion; back them up with references or personal experience.
To learn more, see our tips on writing great answers.
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f209049%2ftranslating-latex-documents-in-different-languages%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown