试卷结构化接口文档

调用地址:https://ocrapi-subject.taobao.com/ocrservice/subject
请求方式:POST
返回类型:JSON

请求参数(Body):

{
//图像数据:base64编码,要求base64编码后大小不超过4M,最短边至少15px,最长边最大4096px,支持jpg/png/bmp格式,和url参数只能同时存在一个
"img": "",
//图像url地址:图片完整URL,URL长度不超过1024字节,URL对应的图片base64编码后大小不超过4M,最短边至少15px,最长边最大4096px,支持jpg/png/bmp格式,和img参数只能同时存在一个
"url": "",
//是否需要自动旋转功能,默认不需要。 true:需要 false:不需要
"rotate": false
}

请求代码示例:

java版

    public static void main(String[] args) {
String url = "https://ocrapi-subject.taobao.com/ocrservice/subject";
String appcode = "你自己的AppCode";
HashMap<String, String> headers = new HashMap<String, String>();
//最后在header中的格式(中间是英文空格)为Authorization:APPCODE 83359fd73fe94948385f570e3c139105
headers.put("Authorization", "APPCODE " + appcode);
//根据API的要求,定义相对应的Content-Type
headers.put("Content-Type", "application/json; charset=UTF-8");
//如果需要使用本地图片,需要将图片base64码放在img后面,如果使用网络图片,则需要将网络图片url放于url参数后面
String bodys = "{//图像数据:base64编码,要求base64编码后大小不超过4M,最短边至少15px,最长边最大4096px,支持jpg/png/bmp格式,和url参数只能同时存在一个\"img\":\"\",//图像url地址:图片完整URL,URL长度不超过1024字节,URL对应的图片base64编码后大小不超过4M,最短边至少15px,最长边最大4096px,支持jpg/png/bmp格式,和img参数只能同时存在一个\"url\":\"\",//是否需要自动旋转功能,默认不需要。true:需要false:不需要\"rotate\":false}";
try {
/**
* 重要提示如下:
* HttpClientUtils请从
* https://gitee.com/duguangdemo/publicclouddemo/blob/master/src/main/java/util/HttpClientUtils.java
* 下载
* HttpExecuteResponse请从
* https://gitee.com/duguangdemo/publicclouddemo/blob/master/src/main/java/util/HttpExecuteResponse.java
* 下载
*
* 相应的依赖请参照
* https://gitee.com/duguangdemo/publicclouddemo/blob/master/pom.xml
*/
HttpExecuteResponse response = HttpClientUtils.doPost(url,bodys, headers);
System.out.println(response.getResponseAsString());
System.out.println(response.toString());
// 需要检查response的headers信息时可用以下代码,方便排查问题用
// for (Object json : response.getHeaders()) {
// System.out.println(json);
// }

} catch (Exception e) {
e.printStackTrace();
}
}

c#版

//using System.IO;
//using System.Text;
//using System.Net;
//using System.Net.Security;
//using System.Security.Cryptography.X509Certificates;

private const String host = "https://ocrapi-subject.taobao.com";
private const String path = "/ocrservice/subject";
private const String method = "POST";
private const String appcode = "你自己的AppCode";

static void Main(string[] args)
{
String querys = "";
String bodys = "{//图像数据:base64编码,要求base64编码后大小不超过4M,最短边至少15px,最长边最大4096px,支持jpg/png/bmp格式,和url参数只能同时存在一个\"img\":\"\",//图像url地址:图片完整URL,URL长度不超过1024字节,URL对应的图片base64编码后大小不超过4M,最短边至少15px,最长边最大4096px,支持jpg/png/bmp格式,和img参数只能同时存在一个\"url\":\"\",//是否需要自动旋转功能,默认不需要。true:需要false:不需要\"rotate\":false}";
String url = host + path;
HttpWebRequest httpRequest = null;
HttpWebResponse httpResponse = null;

if (0 < querys.Length)
{
url = url + "?" + querys;
}

if (host.Contains("https://"))
{
ServicePointManager.ServerCertificateValidationCallback = new RemoteCertificateValidationCallback(CheckValidationResult);
httpRequest = (HttpWebRequest)WebRequest.CreateDefault(new Uri(url));
}
else
{
httpRequest = (HttpWebRequest)WebRequest.Create(url);
}
httpRequest.Method = method;
httpRequest.Headers.Add("Authorization", "APPCODE " + appcode);
//根据API的要求,定义相对应的Content-Type
httpRequest.ContentType = "application/json; charset=UTF-8";
if (0 < bodys.Length)
{
byte[] data = Encoding.UTF8.GetBytes(bodys);
using (Stream stream = httpRequest.GetRequestStream())
{
stream.Write(data, 0, data.Length);
}
}
try
{
httpResponse = (HttpWebResponse)httpRequest.GetResponse();
}
catch (WebException ex)
{
httpResponse = (HttpWebResponse)ex.Response;
}

Console.WriteLine(httpResponse.StatusCode);
Console.WriteLine(httpResponse.Method);
Console.WriteLine(httpResponse.Headers);
Stream st = httpResponse.GetResponseStream();
StreamReader reader = new StreamReader(st, Encoding.GetEncoding("utf-8"));
Console.WriteLine(reader.ReadToEnd());
Console.WriteLine("\n");

}

public static bool CheckValidationResult(object sender, X509Certificate certificate, X509Chain chain, SslPolicyErrors errors)
{
return true;
}

PHP版:

<?php
$host = "https://ocrapi-subject.taobao.com";
$path = "/ocrservice/subject";
$method = "POST";
$appcode = "你自己的AppCode";
$headers = array();
array_push($headers, "Authorization:APPCODE " . $appcode);
//根据API的要求,定义相对应的Content-Type
array_push($headers, "Content-Type".":"."application/json; charset=UTF-8");
$querys = "";
$bodys = "{//图像数据:base64编码,要求base64编码后大小不超过4M,最短边至少15px,最长边最大4096px,支持jpg/png/bmp格式,和url参数只能同时存在一个\"img\":\"\",//图像url地址:图片完整URL,URL长度不超过1024字节,URL对应的图片base64编码后大小不超过4M,最短边至少15px,最长边最大4096px,支持jpg/png/bmp格式,和img参数只能同时存在一个\"url\":\"\",//是否需要自动旋转功能,默认不需要。true:需要false:不需要\"rotate\":false}";
$url = $host . $path;

$curl = curl_init();
curl_setopt($curl, CURLOPT_CUSTOMREQUEST, $method);
curl_setopt($curl, CURLOPT_URL, $url);
curl_setopt($curl, CURLOPT_HTTPHEADER, $headers);
curl_setopt($curl, CURLOPT_FAILONERROR, false);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curl, CURLOPT_HEADER, true);
if (1 == strpos("$".$host, "https://"))
{
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, false);
}
curl_setopt($curl, CURLOPT_POSTFIELDS, $bodys);
var_dump(curl_exec($curl));
?>

Python2:

import urllib, urllib2, sys
import ssl


host = 'https://ocrapi-subject.taobao.com'
path = '/ocrservice/subject'
method = 'POST'
appcode = '你自己的AppCode'
querys = ''
bodys = {}
url = host + path

bodys[''] = "{//图像数据:base64编码,要求base64编码后大小不超过4M,最短边至少15px,最长边最大4096px,支持jpg/png/bmp格式,和url参数只能同时存在一个\"img\":\"\",//图像url地址:图片完整URL,URL长度不超过1024字节,URL对应的图片base64编码后大小不超过4M,最短边至少15px,最长边最大4096px,支持jpg/png/bmp格式,和img参数只能同时存在一个\"url\":\"\",//是否需要自动旋转功能,默认不需要。true:需要false:不需要\"rotate\":false}"
post_data = bodys['']
request = urllib2.Request(url, post_data)
request.add_header('Authorization', 'APPCODE ' + appcode)

request.add_header('Content-Type', 'application/json; charset=UTF-8')
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
response = urllib2.urlopen(request, context=ctx)
content = response.read()
if (content):
print(content)

Python3:

import urllib.request
import urllib.parse
import json
import time
import base64
with open('1.jpg', 'rb') as f: # 以二进制读取本地图片
data = f.read()
encodestr = str(base64.b64encode(data),'utf-8')
#请求头
# 请修改为你自己的appcode,可从云市场订单或者api网关处获得
AppCode = "你自己的AppCode"
headers = {
'Authorization': 'APPCODE ' + AppCode,
'Content-Type': 'application/json; charset=UTF-8'
}

def posturl(url,data={}):
try:
params=json.dumps(dict).encode(encoding='UTF8')
req = urllib.request.Request(url, params, headers)
r = urllib.request.urlopen(req)
html =r.read()
r.close();
return html.decode("utf8")
except urllib.error.HTTPError as e:
print(e.code)
print(e.read().decode("utf8"))
time.sleep(1)
if __name__=="__main__":
url_request="https://ocrapi-subject.taobao.com/ocrservice/subject"
dict = {'img': encodestr}

html = posturl(url_request, data=dict)
print(html)

正常返回示例:

{
//唯一id,用于问题定位
"sid": "3f54dde89670e40aabea33873a4a3766b3e47787fe23682795b31483015083e580461bb6",
//算法版本
"prism_version": "1.0.9",
//识别的文字块的数量,prism_wordsInfo数组大小
"prism_wnum": 0,
//识别的文字的具体内容
"prism_wordsInfo": [],
//页抬头信息
"page_title": "",
//页码
"page_id": 0,
//题型大类信息
"part_info": [
{
//题型标题
"part_title": "",
// 坐标,可能为多块
"pos_list": [
[
{
"x": 5,
"y": 3
},
{
"x": 696,
"y": 7
},
{
"x": 695,
"y": 161
},
{
"x": 5,
"y": 158
}
]
],
"subject_list": [
{
//在part中的序号
"index": 0,
//题目类型:0:选择题;1:填空题;2:阅读理解(阅读+问答选择);3:完型填空(阅读+选择);4:阅读填空(阅读+填空);5:问答题
"type": 0,
//整题置信度
"prob": 74,
//整题文本信息
"text": "2.下列三个集合:\\textcircled 1 A = \\left\\{ x | y = x ^ { 2 } + 1 \\right\\} ; \\textcircled 2 B = \\left\\{ y | y = x ^ { 2 } + 1 \\right) ; \\textcircled 3 C = \\left\\{ \\left( x , y \\right) | y = x ^ { 2 } + 1 \\right\\} . (1)它们是不是相同的集合? (2)它们各自的含义分别是什么?",
"pos_list": [
[
{
"x": 5,
"y": 3
},
{
"x": 696,
"y": 7
},
{
"x": 695,
"y": 161
},
{
"x": 5,
"y": 158
}
]
],
//题目元素
"element_list": [
{
//题目元素类型:0:题干;1:选项;2:解析;3:答案
"type": 0,
//元素的文本信息
"text": "2.下列三个集合:\\textcircled 1 A = \\left\\{ x | y = x ^ { 2 } + 1 \\right\\} ; \\textcircled 2 B = \\left\\{ y | y = x ^ { 2 } + 1 \\right) ; \\textcircled 3 C = \\left\\{ \\left( x , y \\right) | y = x ^ { 2 } + 1 \\right\\} . (1)它们是不是相同的集合? (2)它们各自的含义分别是什么?",
"pos_list": [
[
{
"x": 5,
"y": 3
},
{
"x": 696,
"y": 7
},
{
"x": 695,
"y": 161
},
{
"x": 5,
"y": 158
}
]
],
//内容
"content_list": [
{
//内容类型:0:图片;1:文本;2:公式
"type": 1,
//内容置信度
"prob": 97,
//如果为图片则为bas64编码,公式则为letex编码字符串
"string": "2.下列三个集合:",
//如果element-type为选项类型,则该字段标识content属于哪个选项
"option": "",
// 坐标,可能为多块
"pos": [
{
"x": 5,
"y": 7
},
{
"x": 220,
"y": 8
},
{
"x": 220,
"y": 33
},
{
"x": 5,
"y": 32
}
]
},
{
"type": 2,
"prob": 97,
"string": "\\textcircled 1 A = \\left\\{ x | y = x ^ { 2 } + 1 \\right\\} ; \\textcircled 2 B = \\left\\{ y | y = x ^ { 2 } + ",
"option": "",
"pos": [
{
"x": 220,
"y": 3
},
{
"x": 695,
"y": 4
},
{
"x": 695,
"y": 40
},
{
"x": 220,
"y": 39
}
]
},
{
"type": 2,
"prob": 97,
"string": " 1 \\right) ; \\textcircled 3 C = \\left\\{ \\left( x , y \\right) | y = x ^ { 2 } + 1 \\right\\} .",
"option": "",
"pos": [
{
"x": 40,
"y": 45
},
{
"x": 395,
"y": 47
},
{
"x": 394,
"y": 80
},
{
"x": 40,
"y": 78
}
]
},
{
"type": 1,
"prob": 99,
"string": "(1)它们是不是相同的集合?",
"option": "",
"pos": [
{
"x": 39,
"y": 93
},
{
"x": 382,
"y": 93
},
{
"x": 382,
"y": 118
},
{
"x": 39,
"y": 117
}
]
},
{
"type": 1,
"prob": 99,
"string": "(2)它们各自的含义分别是什么?",
"option": "",
"pos": [
{
"x": 39,
"y": 133
},
{
"x": 438,
"y": 136
},
{
"x": 438,
"y": 161
},
{
"x": 39,
"y": 158
}
]
}
]
}
]
}
]
}
]
}

失败返回示例:

{
"error_code": 400,
"error_msg": "img和url参数不能同时存在"
}

错误码定义:

错误码错误信息描述
400参数错误具体错误请参考返回的error_msg
401您无该功能的权限,请开通后使用您无该功能的权限,请开通后使用
403购买的容量已用完或者签名错误购买的容量已用完或者签名错误
500服务器错误,请稍后重试服务器错误,请稍后重试