jhb2345
/
eng-rus-vowel
mirror of https://github.com/jibby0/eng-rus-vowel


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899
							# Author: Josh Bicking

from praatio import tgio
import sys
from os.path import join
import numpy
import subprocess

if len(sys.argv) != 4:
    print("Usage: {} FOLDER LANG TIER-NAME".format(sys.argv[0]))
    print("FOLDER contains the 15 annotated voice samples.")
    print("LANG is the language (the prefix of the sound/TextGrid files, english or russian).")
    print("TIER-NAME is the name of the ipa tier. Either 'ipa' or 'IPA-phones'.")
    exit(1)

folder = sys.argv[1]
langname = sys.argv[2]
tiername = sys.argv[3]

# list of vowels
vowels = ["i","y","ɨ","ʉ","ɯ","u","ɪ","ʏ","ɪ̈","ʊ̈",
          "ʊ","e","ø","ɘ","ɵ","ɤ","o","e̞","ø̞",
          "ə","ɤ̞","o̞","ɛ","œ","ɜ","ɞ","ʌ","ɔ",
          "æ","ɐ","a","ɶ","ä","ɑ","ɒ"]

# List of all VOTs discovered
vot = {}

# Open all Files
lang = []
russian = []

for i in range(1,16):
    lang.append(tgio.openTextgrid(join(folder, "{}{}.TextGrid".format(langname, i))).tierDict[tiername].entryList)


# Find all stop consonants with a vowel after them
stop_consonants = ["t", "d", "k", "g", "p", "b", "ʔ"]

for i in range(0,15):
    for j in range(0, len(lang[i])):
        if (lang[i][j].label != ""                      # Not silence
            and lang[i][j].label[0] in stop_consonants  # Is a stop consonant
            and j + 1 < len(lang[i])                    # Has an entry after it
            and lang[i][j+1].label != ""                 # Not silence either
            and lang[i][j+1].label[0] in vowels):       # Is a vowel or dipthong
            # Build a script to find where pitch stops

            # We're estimating VOT as "the time between where the
            # consonant Interval of the TextGrid ends, and the
            # beginning of pitch (voicing) starts".
            start = lang[i][j].start
            end = lang[i][j+1].end
            zero = lang[i][j].end

            script = [
                'Read from file: "{}"'.format(join(folder, '{}{}.wav'.format(langname, i+1))),
                'To Pitch: 0, 75, 600']

            for k in numpy.arange(start, end, .001):
                script.append('p = Get value at time: {:.3f}, "Hertz", "Linear"'.format(k))
                script.append('appendInfoLine: p')

            script.append('')

            script = "\n".join(script)

            f = open(join(folder, "tempscript.praat"), "w")
            f.write(script)
            f.close()

            s = subprocess.run(["praat", join(folder, "tempscript.praat")], stdout=subprocess.PIPE)

            s = str(s.stdout)[2:].split("\\n")
            pitch_start = 0
            for line in s:
                if line == "--undefined--":
                    pitch_start += 1
                else:
                    break

            if not (pitch_start == 0 or pitch_start == len(s)):
                # Find where the pitch starts, relative to the sound file
                results = (start + pitch_start) - zero

                # Check for palatalization
                if "ʲ" in lang[i][j].label:
                    consonant = lang[i][j].label[0] + "ʲ"
                else:
                    consonant = lang[i][j].label[0]
                if consonant + lang[i][j+1].label[0] not in vot:
                    vot[consonant + lang[i][j+1].label[0]] = []
                vot[consonant + lang[i][j+1].label[0]].append(str(results))
                if consonant + lang[i][j+1].label[0] not in vot:
                    vot[consonant + lang[i][j+1].label[0]] = []
                vot[consonant + lang[i][j+1].label[0]].append(str(results))

for key in sorted(vot):
    print("{},{},{}".format(key[0], key[1], ",".join(vot[key])))