8 years ago · 33b78519f9
--- a/README.md
+++ b/README.md
@@ -0,0 +1,5 @@
 
				+This repository contains assorted data from our paper, *Vowel Comparison Between Native English and Russian English Speakers*.
			
 
				+
			
 
				+VOT analysis and English TextGrid annotation were done by Josh Bicking. 
			
 
				+
			
 
				+Vowel length, formant extraction, and Russian TextGrid annotation were done by Tom Thomas.
			
--- a/textgrids/english1.TextGrid
+++ b/textgrids/english1.TextGrid
--- a/textgrids/english10.TextGrid
+++ b/textgrids/english10.TextGrid
--- a/textgrids/english11.TextGrid
+++ b/textgrids/english11.TextGrid
--- a/textgrids/english12.TextGrid
+++ b/textgrids/english12.TextGrid
--- a/textgrids/english13.TextGrid
+++ b/textgrids/english13.TextGrid
--- a/textgrids/english14.TextGrid
+++ b/textgrids/english14.TextGrid
--- a/textgrids/english15.TextGrid
+++ b/textgrids/english15.TextGrid
--- a/textgrids/english2.TextGrid
+++ b/textgrids/english2.TextGrid
--- a/textgrids/english3.TextGrid
+++ b/textgrids/english3.TextGrid
--- a/textgrids/english4.TextGrid
+++ b/textgrids/english4.TextGrid
--- a/textgrids/english5.TextGrid
+++ b/textgrids/english5.TextGrid
--- a/textgrids/english6.TextGrid
+++ b/textgrids/english6.TextGrid
--- a/textgrids/english7.TextGrid
+++ b/textgrids/english7.TextGrid
--- a/textgrids/english8.TextGrid
+++ b/textgrids/english8.TextGrid
--- a/textgrids/english9.TextGrid
+++ b/textgrids/english9.TextGrid
--- a/textgrids/russian1.TextGrid
+++ b/textgrids/russian1.TextGrid
--- a/textgrids/russian10.TextGrid
+++ b/textgrids/russian10.TextGrid
--- a/textgrids/russian11.TextGrid
+++ b/textgrids/russian11.TextGrid
--- a/textgrids/russian12.TextGrid
+++ b/textgrids/russian12.TextGrid
--- a/textgrids/russian13.TextGrid
+++ b/textgrids/russian13.TextGrid
--- a/textgrids/russian14.TextGrid
+++ b/textgrids/russian14.TextGrid
--- a/textgrids/russian15.TextGrid
+++ b/textgrids/russian15.TextGrid
--- a/textgrids/russian2.TextGrid
+++ b/textgrids/russian2.TextGrid
--- a/textgrids/russian3.TextGrid
+++ b/textgrids/russian3.TextGrid
--- a/textgrids/russian4.TextGrid
+++ b/textgrids/russian4.TextGrid
--- a/textgrids/russian5.TextGrid
+++ b/textgrids/russian5.TextGrid
--- a/textgrids/russian6.TextGrid
+++ b/textgrids/russian6.TextGrid
--- a/textgrids/russian7.TextGrid
+++ b/textgrids/russian7.TextGrid
--- a/textgrids/russian8.TextGrid
+++ b/textgrids/russian8.TextGrid
--- a/textgrids/russian9.TextGrid
+++ b/textgrids/russian9.TextGrid
--- a/vot-palatalization.py
+++ b/vot-palatalization.py
@@ -0,0 +1,99 @@
 
				+# Author: Josh Bicking
			
 
				+
			
 
				+from praatio import tgio
			
 
				+import sys
			
 
				+from os.path import join
			
 
				+import numpy
			
 
				+import subprocess
			
 
				+
			
 
				+if len(sys.argv) != 4:
			
 
				+    print("Usage: {} FOLDER LANG TIER-NAME".format(sys.argv[0]))
			
 
				+    print("FOLDER contains the 15 annotated voice samples.")
			
 
				+    print("LANG is the language (the prefix of the sound/TextGrid files, english or russian).")
			
 
				+    print("TIER-NAME is the name of the ipa tier. Either 'ipa' or 'IPA-phones'.")
			
 
				+    exit(1)
			
 
				+
			
 
				+folder = sys.argv[1]
			
 
				+langname = sys.argv[2]
			
 
				+tiername = sys.argv[3]
			
 
				+
			
 
				+# list of vowels
			
 
				+vowels = ["i","y","ɨ","ʉ","ɯ","u","ɪ","ʏ","ɪ̈","ʊ̈",
			
 
				+          "ʊ","e","ø","ɘ","ɵ","ɤ","o","e̞","ø̞",
			
 
				+          "ə","ɤ̞","o̞","ɛ","œ","ɜ","ɞ","ʌ","ɔ",
			
 
				+          "æ","ɐ","a","ɶ","ä","ɑ","ɒ"]
			
 
				+
			
 
				+# List of all VOTs discovered
			
 
				+vot = {}
			
 
				+
			
 
				+# Open all Files
			
 
				+lang = []
			
 
				+russian = []
			
 
				+
			
 
				+for i in range(1,16):
			
 
				+    lang.append(tgio.openTextgrid(join(folder, "{}{}.TextGrid".format(langname, i))).tierDict[tiername].entryList)
			
 
				+
			
 
				+
			
 
				+# Find all stop consonants with a vowel after them
			
 
				+stop_consonants = ["t", "d", "k", "g", "p", "b", "ʔ"]
			
 
				+
			
 
				+for i in range(0,15):
			
 
				+    for j in range(0, len(lang[i])):
			
 
				+        if (lang[i][j].label != ""                      # Not silence
			
 
				+            and lang[i][j].label[0] in stop_consonants  # Is a stop consonant
			
 
				+            and j + 1 < len(lang[i])                    # Has an entry after it
			
 
				+            and lang[i][j+1].label != ""                 # Not silence either
			
 
				+            and lang[i][j+1].label[0] in vowels):       # Is a vowel or dipthong
			
 
				+            # Build a script to find where pitch stops
			
 
				+
			
 
				+            # We're estimating VOT as "the time between where the
			
 
				+            # consonant Interval of the TextGrid ends, and the
			
 
				+            # beginning of pitch (voicing) starts".
			
 
				+            start = lang[i][j].start
			
 
				+            end = lang[i][j+1].end
			
 
				+            zero = lang[i][j].end
			
 
				+
			
 
				+            script = [
			
 
				+                'Read from file: "{}"'.format(join(folder, '{}{}.wav'.format(langname, i+1))),
			
 
				+                'To Pitch: 0, 75, 600']
			
 
				+
			
 
				+            for k in numpy.arange(start, end, .001):
			
 
				+                script.append('p = Get value at time: {:.3f}, "Hertz", "Linear"'.format(k))
			
 
				+                script.append('appendInfoLine: p')
			
 
				+
			
 
				+            script.append('')
			
 
				+
			
 
				+            script = "\n".join(script)
			
 
				+
			
 
				+            f = open(join(folder, "tempscript.praat"), "w")
			
 
				+            f.write(script)
			
 
				+            f.close()
			
 
				+
			
 
				+            s = subprocess.run(["praat", join(folder, "tempscript.praat")], stdout=subprocess.PIPE)
			
 
				+
			
 
				+            s = str(s.stdout)[2:].split("\\n")
			
 
				+            pitch_start = 0
			
 
				+            for line in s:
			
 
				+                if line == "--undefined--":
			
 
				+                    pitch_start += 1
			
 
				+                else:
			
 
				+                    break
			
 
				+
			
 
				+            if not (pitch_start == 0 or pitch_start == len(s)):
			
 
				+                # Find where the pitch starts, relative to the sound file
			
 
				+                results = (start + pitch_start) - zero
			
 
				+
			
 
				+                # Check for palatalization
			
 
				+                if "ʲ" in lang[i][j].label:
			
 
				+                    consonant = lang[i][j].label[0] + "ʲ"
			
 
				+                else:
			
 
				+                    consonant = lang[i][j].label[0]
			
 
				+                if consonant + lang[i][j+1].label[0] not in vot:
			
 
				+                    vot[consonant + lang[i][j+1].label[0]] = []
			
 
				+                vot[consonant + lang[i][j+1].label[0]].append(str(results))
			
 
				+                if consonant + lang[i][j+1].label[0] not in vot:
			
 
				+                    vot[consonant + lang[i][j+1].label[0]] = []
			
 
				+                vot[consonant + lang[i][j+1].label[0]].append(str(results))
			
 
				+
			
 
				+for key in sorted(vot):
			
 
				+    print("{},{},{}".format(key[0], key[1], ",".join(vot[key])))
			
--- a/vot.py
+++ b/vot.py
@@ -0,0 +1,90 @@
 
				+# Author: Josh Bicking
			
 
				+
			
 
				+from praatio import tgio
			
 
				+import sys
			
 
				+from os.path import join
			
 
				+import numpy
			
 
				+import subprocess
			
 
				+
			
 
				+if len(sys.argv) != 4:
			
 
				+    print("Usage: {} FOLDER LANG TIER-NAME".format(sys.argv[0]))
			
 
				+    print("FOLDER contains the 15 annotated voice samples.")
			
 
				+    print("LANG is the language (the prefix of the sound/TextGrid files, english or russian).")
			
 
				+    print("TIER-NAME is the name of the ipa tier. Either 'ipa' or 'IPA-phones'.")
			
 
				+    exit(1)
			
 
				+
			
 
				+folder = sys.argv[1]
			
 
				+langname = sys.argv[2]
			
 
				+tiername = sys.argv[3]
			
 
				+
			
 
				+# list of vowels
			
 
				+vowels = ["i","y","ɨ","ʉ","ɯ","u","ɪ","ʏ","ɪ̈","ʊ̈",
			
 
				+          "ʊ","e","ø","ɘ","ɵ","ɤ","o","e̞","ø̞",
			
 
				+          "ə","ɤ̞","o̞","ɛ","œ","ɜ","ɞ","ʌ","ɔ",
			
 
				+          "æ","ɐ","a","ɶ","ä","ɑ","ɒ"]
			
 
				+
			
 
				+# List of all VOTs discovered
			
 
				+vot = {}
			
 
				+
			
 
				+# Open all Files
			
 
				+lang = []
			
 
				+russian = []
			
 
				+
			
 
				+for i in range(1,16):
			
 
				+    lang.append(tgio.openTextgrid(join(folder, "{}{}.TextGrid".format(langname, i))).tierDict[tiername].entryList)
			
 
				+
			
 
				+
			
 
				+# Find all stop consonants with a vowel after them
			
 
				+stop_consonants = ["t", "d", "k", "g", "p", "b", "ʔ"]
			
 
				+
			
 
				+for i in range(0,15):
			
 
				+    for j in range(0, len(lang[i])):
			
 
				+        if (lang[i][j].label != ""                      # Not silence
			
 
				+            and lang[i][j].label[0] in stop_consonants  # Is a stop consonant
			
 
				+            and j + 1 < len(lang[i])                    # Has an entry after it
			
 
				+            and lang[i][j+1].label != ""                 # Not silence either
			
 
				+            and lang[i][j+1].label[0] in vowels):       # Is a vowel or dipthong
			
 
				+            # Build a script to find where pitch stops
			
 
				+
			
 
				+            # We're estimating VOT as "the time between where the
			
 
				+            # consonant Interval of the TextGrid ends, and the
			
 
				+            # beginning of pitch (voicing) starts".
			
 
				+            start = lang[i][j].start
			
 
				+            end = lang[i][j+1].end
			
 
				+            zero = lang[i][j].end
			
 
				+
			
 
				+            script = [
			
 
				+                'Read from file: "{}"'.format(join(folder, '{}{}.wav'.format(langname, i+1))),
			
 
				+                'To Pitch: 0, 75, 600']
			
 
				+
			
 
				+            for k in numpy.arange(start, end, .001):
			
 
				+                script.append('p = Get value at time: {:.3f}, "Hertz", "Linear"'.format(k))
			
 
				+                script.append('appendInfoLine: p')
			
 
				+
			
 
				+            script.append('')
			
 
				+
			
 
				+            script = "\n".join(script)
			
 
				+
			
 
				+            f = open(join(folder, "tempscript.praat"), "w")
			
 
				+            f.write(script)
			
 
				+            f.close()
			
 
				+
			
 
				+            s = subprocess.run(["praat", join(folder, "tempscript.praat")], stdout=subprocess.PIPE)
			
 
				+
			
 
				+            s = str(s.stdout)[2:].split("\\n")
			
 
				+            pitch_start = 0
			
 
				+            for line in s:
			
 
				+                if line == "--undefined--":
			
 
				+                    pitch_start += 1
			
 
				+                else:
			
 
				+                    break
			
 
				+
			
 
				+            if not (pitch_start == 0 or pitch_start == len(s)):
			
 
				+                # Find where the pitch starts, relative to the sound file
			
 
				+                results = (start + pitch_start) - zero
			
 
				+                if lang[i][j].label[0] + lang[i][j+1].label[0] not in vot:
			
 
				+                    vot[lang[i][j].label[0] + lang[i][j+1].label[0]] = []
			
 
				+                vot[lang[i][j].label[0] + lang[i][j+1].label[0]].append(str(results))
			
 
				+
			
 
				+for key in sorted(vot):
			
 
				+    print("{},{},{}".format(key[0], key[1], ",".join(vot[key])))