Next Spaceship

Driving into future…

Check if a String Contains Chinese Characters

| Comments

Transfer the string to Unicode and since Chinese is between 0x4e00 and 0x9fff, just check if there is a character in this range.

Check If a String Contains Chinese Characters
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#coding=utf-8
#!/usr/bin/python

#This code is intended for Python 2.7

#Copy this file in the folder where
#you want to abstract Chinese from files

#Created by Leon on July, 7, 2011
#http://leons.im

import sys, os, codecs, glob, re

fout = codecs.open("out.txt", "wb", "gbk")
num = 1
def record(bn, ln, line):
  global num
  for i in line:
      if ord(i) >= 0x4e00 and ord(i) <= 0x9fff:
          print line
          fout.write('%d:%s:%d:%s\r\n' % (num, bn, ln, line))
          num = num + 1
          break

def transcode(infile):
    print "infile = " + infile

    fin = codecs.open(infile, "rb", "gbk")
    ln = 1
    bn = os.path.basename(infile)
    try:
      for line in fin.readlines():
          m = re.search(r'^s*//', line)
          if m:
              continue
          m = re.search(r'_T("([^)]*)")', line)
          if m and len(m.group(1)) > 0:
              record(bn, ln, m.group(0))
          m = re.search(r'L"([^"]*)"', line)
          if m and len(m.group(1)) > 0:
              record(bn, ln, m.group(0))
          ln = ln + 1
    except:
      print "error."
    fin.close()

path = os.path.abspath(os.path.dirname(sys.argv[0]))
print "Current Path: " + path

for dirpath, dirs, files in os.walk(path):
    for filename in files:
        if re.search(r".(h|m|mm|cpp|inl|def|txt)$", filename):
            print "----" + filename + "...."
            transcode(os.path.join(dirpath, filename))
            print "Done."
print
print "Created for you by Leon on July, 7, 2011."
fout.close()
raw_input()

Comments